{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入需要用到的package\n",
    "import numpy as np\n",
    "import json\n",
    "# 读入训练数据\n",
    "datafile = './datasets/housing.data'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data(datafile):\n",
    "    # 从文件导入数据\n",
    "    data = np.fromfile(datafile, sep=' ')\n",
    "\n",
    "    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数\n",
    "    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \\\n",
    "                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]\n",
    "    feature_num = len(feature_names)\n",
    "\n",
    "    # 将原始数据进行Reshape，变成[N, 14]这样的形状\n",
    "    # print(data.shape)\n",
    "    # // 表示整数除法，返回商的整数部分\n",
    "    # reshape([rows, columns]) 将数据重新组织成一个二维数组（矩阵）\n",
    "    data = data.reshape([data.shape[0] // feature_num, feature_num])\n",
    "    # print(data.shape)\n",
    "\n",
    "    # 将原数据集拆分成训练集和测试集\n",
    "    # 这里使用80%的数据做训练，20%的数据做测试\n",
    "    # 测试集和训练集必须是没有交集的\n",
    "    ratio = 0.8\n",
    "    offset = int(data.shape[0] * ratio)\n",
    "    training_data = data[:offset]\n",
    "\n",
    "    # 计算训练集的最大值，最小值，平均值\n",
    "    # 对每个特征进行归一化处理，使得每个特征的取值缩放到0~1之间。这样做有两个好处：一是模型训练更高效；二是特征前的权重大小可以代表该变量对预测结果的贡献度（因为每个特征值本身的范围相同）。\n",
    "    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \\\n",
    "                                 training_data.sum(axis=0) / training_data.shape[0]\n",
    "\n",
    "    # 对数据进行归一化处理\n",
    "    for i in range(feature_num):\n",
    "        #print(maximums[i], minimums[i], avgs[i])\n",
    "        data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])\n",
    "\n",
    "    # 训练集和测试集的划分比例\n",
    "    training_data = data[:offset]\n",
    "    test_data = data[offset:]\n",
    "    return training_data, test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 获取数据\n",
    "training_data, test_data = load_data(datafile)\n",
    "x = training_data[:, :-1]\n",
    "y = training_data[:, -1:]\n",
    "\n",
    "# 查看数据\n",
    "print(x[0])\n",
    "print(y[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 训练过程\n",
    "\n",
    "上述计算过程描述了如何构建神经网络，通过神经网络完成预测值和损失函数的计算。接下来介绍如何求解参数$w$和$b$的数值，这个过程也称为模型训练过程。训练过程是深度学习模型的关键要素之一，其目标是让定义的损失函数$Loss$尽可能的小，也就是说找到一个参数解$w$和$b$，使得损失函数取得极小值。\n",
    "\n",
    "$$\\frac{\\partial{L}}{\\partial{w_j}} = \\frac{1}{N}\\sum_{i=1}^N{(z_i - y_i)\\frac{\\partial{z_i}}{\\partial{w_j}}} = \\frac{1}{N}\\sum_{i=1}^N{(z_i - y_i)x_i^{j}}$$\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Network(object):\n",
    "    def __init__(self, num_of_weights):\n",
    "        # 随机产生w的初始值\n",
    "        # 为了保持程序每次运行结果的一致性，此处设置固定的随机数种子\n",
    "        np.random.seed(0)\n",
    "        self.w = np.random.randn(num_of_weights, 1)\n",
    "        self.b = 0.\n",
    "\n",
    "    def forward(self, x):\n",
    "        z = np.dot(x, self.w) + self.b\n",
    "        return z\n",
    "\n",
    "    def loss(self, z, y):\n",
    "        error = z - y\n",
    "        num_samples = error.shape[0]\n",
    "        cost = error * error\n",
    "        cost = np.sum(cost) / num_samples\n",
    "        return cost\n",
    "\n",
    "    def gradient(self, x, y):\n",
    "        z = self.forward(x)\n",
    "        gradient_w = (z-y)*x\n",
    "        gradient_w = np.mean(gradient_w, axis=0)\n",
    "        # 这行代码是将一维数组转换为二维列向量，通过增加一个新的轴（维度）实现的。\n",
    "        # 例如，如果输入的数组是 [1, 2, 3]，那么通过 np.newaxis 插入一个新轴后，结果将是 [[1], [2], [3]]。\n",
    "        gradient_w = gradient_w[:, np.newaxis]\n",
    "        gradient_b = (z - y)\n",
    "        gradient_b = np.mean(gradient_b)\n",
    "        return gradient_w, gradient_b\n",
    "\n",
    "    def update(self, gradient_w, gradient_b, eta = 0.01):\n",
    "        self.w = self.w - eta * gradient_w\n",
    "        self.b = self.b - eta * gradient_b\n",
    "\n",
    "    def train(self, x, y, iterations=100, eta=0.01):\n",
    "        losses = []\n",
    "        for i in range(iterations):\n",
    "            z = self.forward(x)\n",
    "            L = self.loss(z, y)\n",
    "            gradient_w, gradient_b = self.gradient(x, y)\n",
    "            self.update(gradient_w, gradient_b, eta)\n",
    "            losses.append(L)\n",
    "            if (i+1) % 10 == 0:\n",
    "                print('iter {}, loss {}'.format(i, L))\n",
    "        return losses"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 随机梯度下降法（ Stochastic Gradient Descent）\n",
    "\n",
    "在上述程序中，每次损失函数和梯度计算都是基于数据集中的全量数据。对于波士顿房价预测任务数据集而言，样本数比较少，只有404个。但在实际问题中，数据集往往非常大，如果每次都使用全量数据进行计算，效率非常低，通俗地说就是“杀鸡焉用牛刀”。由于参数每次只沿着梯度反方向更新一点点，因此方向并不需要那么精确。一个合理的解决方案是每次从总的数据集中随机抽取出小部分数据来代表整体，基于这部分数据计算梯度和损失来更新参数，这种方法被称作随机梯度下降法（Stochastic Gradient Descent，SGD），核心概念如下：\n",
    "\n",
    "* mini-batch：每次迭代时抽取出来的一批数据被称为一个mini-batch。\n",
    "* batch_size：一个mini-batch所包含的样本数目称为batch_size。\n",
    "* epoch：当程序迭代的时候，按mini-batch逐渐抽取出样本，当把整个数据集都遍历到了的时候，则完成了一轮训练，也叫一个epoch。启动训练时，可以将训练的轮数num_epochs和batch_size作为参数传入。\n",
    "\n",
    "SGD里面是随机抽取一部分样本代表总体。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Network(object):\n",
    "    def __init__(self, num_of_weights):\n",
    "        # 随机产生w的初始值\n",
    "        # 为了保持程序每次运行结果的一致性，此处设置固定的随机数种子\n",
    "        #np.random.seed(0)\n",
    "        self.w = np.random.randn(num_of_weights, 1)\n",
    "        self.b = 0.\n",
    "\n",
    "    def forward(self, x):\n",
    "        z = np.dot(x, self.w) + self.b\n",
    "        return z\n",
    "\n",
    "    def loss(self, z, y):\n",
    "        error = z - y\n",
    "        num_samples = error.shape[0]\n",
    "        cost = error * error\n",
    "        cost = np.sum(cost) / num_samples\n",
    "        return cost\n",
    "\n",
    "    def gradient(self, x, y):\n",
    "        z = self.forward(x)\n",
    "        N = x.shape[0]\n",
    "        gradient_w = 1. / N * np.sum((z-y) * x, axis=0)\n",
    "        gradient_w = gradient_w[:, np.newaxis]\n",
    "        gradient_b = 1. / N * np.sum(z-y)\n",
    "        return gradient_w, gradient_b\n",
    "\n",
    "    def update(self, gradient_w, gradient_b, eta = 0.01):\n",
    "        self.w = self.w - eta * gradient_w\n",
    "        self.b = self.b - eta * gradient_b\n",
    "\n",
    "\n",
    "    def train(self, training_data, num_epochs, batch_size=10, eta=0.01):\n",
    "        n = len(training_data)\n",
    "        losses = []\n",
    "        for epoch_id in range(num_epochs):\n",
    "            # 在每轮迭代开始之前，将训练数据的顺序随机打乱\n",
    "            # 然后再按每次取batch_size条数据的方式取出\n",
    "            np.random.shuffle(training_data)\n",
    "            # 将训练数据进行拆分，每个mini_batch包含batch_size条的数据\n",
    "            mini_batches = [training_data[k:k+batch_size] for k in range(0, n, batch_size)]\n",
    "            for iter_id, mini_batch in enumerate(mini_batches):\n",
    "                #print(self.w.shape)\n",
    "                #print(self.b)\n",
    "                x = mini_batch[:, :-1]\n",
    "                y = mini_batch[:, -1:]\n",
    "                a = self.forward(x)\n",
    "                loss = self.loss(a, y)\n",
    "                gradient_w, gradient_b = self.gradient(x, y)\n",
    "                self.update(gradient_w, gradient_b, eta)\n",
    "                losses.append(loss)\n",
    "                print('Epoch {:3d} / iter {:3d}, loss = {:.4f}'.\n",
    "                                 format(epoch_id, iter_id, loss))\n",
    "\n",
    "        return losses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "# 获取数据\n",
    "train_data, test_data = load_data(datafile)\n",
    "\n",
    "# 创建网络\n",
    "net = Network(13)\n",
    "# 启动训练\n",
    "losses = net.train(train_data, num_epochs=50, batch_size=100, eta=0.1)\n",
    "\n",
    "# 画出损失函数的变化趋势\n",
    "plot_x = np.arange(len(losses))\n",
    "plot_y = np.array(losses)\n",
    "plt.plot(plot_x, plot_y)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cuda11.6_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.20"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
